library(knitr)
library(ggplot2)
library(dplyr)
library(ggrepel)
library(Biobase)
library(tibble)
library(annotables)
library(DESeq2)
library(fgsea)
library(DT)
library("BiocParallel")
register(MulticoreParam(4))

Gautam’s samples

Investigating liver samples

Transcriptional profile PCA and differential gene expression (DGE)

RNA-seq

RNA was isolated and sequenced from the various cell types.

Sample summary

sample_table <- read.delim('sample_index.txt', stringsAsFactors = FALSE)

datatable(sample_table) %>% 
  formatStyle("species", backgroundColor = styleEqual(c("Human","Mouse"), c("#EAD3BF","#AA9486"))) %>%
  formatStyle("condition", backgroundColor = styleEqual(c("CONTROL","LIVER FAILURE"), c("#9986A5","#79402E")))

Human

PCA

counts <- read.delim('human_counts', stringsAsFactors = FALSE)

human_coldata <- sample_table %>% dplyr::filter(species == "Human")
rownames(human_coldata) <-  human_coldata$sample_id
human_coldata$sample_id <- NULL
dds <- DESeqDataSetFromMatrix(counts, colData = human_coldata, design = ~condition)

my_vst <- vst(dds)
pcaData <- plotPCA(my_vst, intgroup ="condition",returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

ggplot(pcaData,aes(PC1,PC2,colour = condition)) + geom_point(size = 3 ) +
        xlab(paste0("PC1: ",percentVar[1],"% variance")) +
        ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
          geom_label_repel(aes(label = name), size = 2,box.padding   = 0.1,point.padding = 0.1,
                   segment.color = 'grey50') +
        theme_minimal()

Genes driving PC1 and PC2

top_contribs = function(object,my_genome) {
  # calculate the variance for each gene
  rv <- rowVars(assay(object))

  # select the 1000 top genes by variance
  select <- order(rv, decreasing=TRUE)[seq_len(min(1000, length(rv)))]

  # perform a PCA on the data in assay(x) for the selected genes
  pca <- prcomp(t(assay(object)[select,]))

  # the contribution to the total variance for each component
  percentVar <- pca$sdev^2 / sum( pca$sdev^2 )

  # Top 20 contributers to PC1 PC2
  PCA1_contrib <- sort(abs(pca$rotation[,1]), decreasing = TRUE )[1:20]
  PCA2_contrib <- sort(abs(pca$rotation[,2]), decreasing = TRUE )[1:20]
  PCA_contrib <- c(PCA1_contrib, PCA2_contrib)
  PCA_contrib <- data.frame("ensgene" = names(PCA_contrib), PCA_contrib)
  PCA_contrib <- cbind(PCA_contrib, data.frame("PC" = c(rep("PC1",20),rep("PC2","20"))))
  PCA_contrib <- left_join(PCA_contrib, my_genome, by = "ensgene") %>% dplyr::select("PC","symbol", "biotype")
  return(PCA_contrib)
}

datatable(top_contribs(my_vst,grch38))

Differential Gene Expression

deseq <- DESeq(dds, parallel = TRUE)

res <- results(deseq, contrast = c("condition", "LIVER FAILURE", "CONTROL"))
resOrdered <- res[order(res$padj),]
resOrdered <- data.frame(resOrdered)
resOrdered <- left_join(rownames_to_column(resOrdered, var = "ensgene"), grch38, by = "ensgene") %>% 
  dplyr::select(symbol,log2FoldChange,padj,biotype)
 # for_DT <- resOrdered[1:2000,]

datatable(resOrdered, 
          extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('excel', 'pdf', 'print')
  ))

GSEA

hallmark <- gmtPathways("h.all.v6.2.symbols.gmt")

plot_gsea <- function(res){
res <- res[complete.cases(res),]
res$fcSign <- sign(res$log2FoldChange)
res$logP <- -log10(res$padj)
res$metric <- res$logP/res$fcSign
y<-res[,c("symbol", "metric")]
geneList <- y$metric
names(geneList) <- y$symbol
geneList <- geneList[order(geneList, decreasing = T)]
fgseaRes <- fgsea(hallmark, geneList, nperm=10000, maxSize = 500)
fgseaRes <- fgseaRes[fgseaRes$padj < 0.0005,]
plotGseaTable(hallmark[fgseaRes$pathway], geneList, fgseaRes, gseaParam = 0.4)
}

plot_gsea(resOrdered)

Mouse

PCA

counts <- read.delim('mouse_counts', stringsAsFactors = FALSE)

mouse_coldata <- sample_table %>% dplyr::filter(species == "Mouse")
rownames(mouse_coldata) <-  mouse_coldata$sample_id
mouse_coldata$sample_id <- NULL
dds <- DESeqDataSetFromMatrix(counts, colData = mouse_coldata, design = ~condition)

my_vst <- vst(dds)

pcaData <- DESeq2::plotPCA(my_vst, intgroup ="condition",returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

ggplot(pcaData,aes(PC1,PC2,colour = condition)) + geom_point(size = 3 ) +
        xlab(paste0("PC1: ",percentVar[1],"% variance")) +
        ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
          geom_label_repel(aes(label = name), size = 2,box.padding   = 0.1,point.padding = 0.1,
                   segment.color = 'grey50') +
        theme_minimal()

Genes driving PC1 and PC2

datatable(top_contribs(my_vst,grcm38))

Differential Gene Expression

deseq <- DESeq(dds, parallel = TRUE)
res <- results(deseq, contrast = c("condition", "LIVER FAILURE", "CONTROL"))
resOrdered <- res[order(res$padj),]
resOrdered <- data.frame(resOrdered)
resOrdered <- left_join(rownames_to_column(resOrdered, var = "ensgene"), grcm38, by = "ensgene") %>% 
  dplyr::select(symbol,log2FoldChange,padj,biotype)
#for_DT <- resOrdered[1:2000,]

datatable(resOrdered, 
          extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('excel', 'pdf', 'print')
  ))
LS0tDQp0aXRsZTogIkV4cGxvcmluZyBIdW1hbiBQcmltYXJ5IFNhbXBsZXMiDQphdXRob3I6IDxhIGhyZWY9Imh0dHBzOi8vY2hlbGEtamFtZXMuZ2l0aHViLmlvLyI+IDxoMz4gQ2hlbGEgSmFtZXMgPC9oMz4gPC9hPiBcbmV3bGluZSBDYW5jZXIgSW5zdGl0dXRlLCBVQ0wsIFVLDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0KaHRtbHRvb2xzOjp0YWdMaXN0KHJtYXJrZG93bjo6aHRtbF9kZXBlbmRlbmN5X2ZvbnRfYXdlc29tZSgpKQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGV2YWwgPSBUUlVFLCBjYWNoZSA9IEZBTFNFLCBvdXQud2lkdGggPSAiMTAwJSIsDQogICAgICAgICAgICAgICAgICAgICAgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpDQpgYGANCg0KDQpgYGB7ciBsaWJyYXJpZXMgfQ0KbGlicmFyeShrbml0cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KGdncmVwZWwpDQpsaWJyYXJ5KEJpb2Jhc2UpDQpsaWJyYXJ5KHRpYmJsZSkNCmxpYnJhcnkoYW5ub3RhYmxlcykNCmxpYnJhcnkoREVTZXEyKQ0KbGlicmFyeShmZ3NlYSkNCmxpYnJhcnkoRFQpDQpsaWJyYXJ5KCJCaW9jUGFyYWxsZWwiKQ0KcmVnaXN0ZXIoTXVsdGljb3JlUGFyYW0oNCkpDQpgYGANCg0KIyBHYXV0YW0ncyBzYW1wbGVzIA0KDQpJbnZlc3RpZ2F0aW5nIGxpdmVyIHNhbXBsZXMNCg0KIyBUcmFuc2NyaXB0aW9uYWwgcHJvZmlsZSBQQ0EgYW5kIGRpZmZlcmVudGlhbCBnZW5lIGV4cHJlc3Npb24gKERHRSkNCg0KIyMgUk5BLXNlcQ0KDQpSTkEgd2FzIGlzb2xhdGVkIGFuZCBzZXF1ZW5jZWQgZnJvbSB0aGUgdmFyaW91cyBjZWxsIHR5cGVzLg0KDQojIyBTYW1wbGUgc3VtbWFyeQ0KDQpgYGB7cn0NCnNhbXBsZV90YWJsZSA8LSByZWFkLmRlbGltKCdzYW1wbGVfaW5kZXgudHh0Jywgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQ0KDQpkYXRhdGFibGUoc2FtcGxlX3RhYmxlKSAlPiUgDQogIGZvcm1hdFN0eWxlKCJzcGVjaWVzIiwgYmFja2dyb3VuZENvbG9yID0gc3R5bGVFcXVhbChjKCJIdW1hbiIsIk1vdXNlIiksIGMoIiNFQUQzQkYiLCIjQUE5NDg2IikpKSAlPiUNCiAgZm9ybWF0U3R5bGUoImNvbmRpdGlvbiIsIGJhY2tncm91bmRDb2xvciA9IHN0eWxlRXF1YWwoYygiQ09OVFJPTCIsIkxJVkVSIEZBSUxVUkUiKSwgYygiIzk5ODZBNSIsIiM3OTQwMkUiKSkpDQoNCmBgYA0KDQojIyBIdW1hbg0KIyMjIFBDQQ0KDQpgYGB7cn0NCmNvdW50cyA8LSByZWFkLmRlbGltKCdodW1hbl9jb3VudHMnLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQoNCmh1bWFuX2NvbGRhdGEgPC0gc2FtcGxlX3RhYmxlICU+JSBkcGx5cjo6ZmlsdGVyKHNwZWNpZXMgPT0gIkh1bWFuIikNCnJvd25hbWVzKGh1bWFuX2NvbGRhdGEpIDwtICBodW1hbl9jb2xkYXRhJHNhbXBsZV9pZA0KaHVtYW5fY29sZGF0YSRzYW1wbGVfaWQgPC0gTlVMTA0KZGRzIDwtIERFU2VxRGF0YVNldEZyb21NYXRyaXgoY291bnRzLCBjb2xEYXRhID0gaHVtYW5fY29sZGF0YSwgZGVzaWduID0gfmNvbmRpdGlvbikNCg0KbXlfdnN0IDwtIHZzdChkZHMpDQpwY2FEYXRhIDwtIHBsb3RQQ0EobXlfdnN0LCBpbnRncm91cCA9ImNvbmRpdGlvbiIscmV0dXJuRGF0YSA9IFRSVUUpDQpwZXJjZW50VmFyIDwtIHJvdW5kKDEwMCAqIGF0dHIocGNhRGF0YSwgInBlcmNlbnRWYXIiKSkNCg0KZ2dwbG90KHBjYURhdGEsYWVzKFBDMSxQQzIsY29sb3VyID0gY29uZGl0aW9uKSkgKyBnZW9tX3BvaW50KHNpemUgPSAzICkgKw0KICAgICAgICB4bGFiKHBhc3RlMCgiUEMxOiAiLHBlcmNlbnRWYXJbMV0sIiUgdmFyaWFuY2UiKSkgKw0KICAgICAgICB5bGFiKHBhc3RlMCgiUEMyOiAiLHBlcmNlbnRWYXJbMl0sIiUgdmFyaWFuY2UiKSkgKyANCiAgICAgICAgICBnZW9tX2xhYmVsX3JlcGVsKGFlcyhsYWJlbCA9IG5hbWUpLCBzaXplID0gMixib3gucGFkZGluZyAgID0gMC4xLHBvaW50LnBhZGRpbmcgPSAwLjEsDQogICAgICAgICAgICAgICAgICAgc2VnbWVudC5jb2xvciA9ICdncmV5NTAnKSArDQogICAgICAgIHRoZW1lX21pbmltYWwoKQ0KDQpgYGANCg0KIyMjIEdlbmVzIGRyaXZpbmcgUEMxIGFuZCBQQzINCg0KYGBge3J9DQp0b3BfY29udHJpYnMgPSBmdW5jdGlvbihvYmplY3QsbXlfZ2Vub21lKSB7DQogICMgY2FsY3VsYXRlIHRoZSB2YXJpYW5jZSBmb3IgZWFjaCBnZW5lDQogIHJ2IDwtIHJvd1ZhcnMoYXNzYXkob2JqZWN0KSkNCg0KICAjIHNlbGVjdCB0aGUgMTAwMCB0b3AgZ2VuZXMgYnkgdmFyaWFuY2UNCiAgc2VsZWN0IDwtIG9yZGVyKHJ2LCBkZWNyZWFzaW5nPVRSVUUpW3NlcV9sZW4obWluKDEwMDAsIGxlbmd0aChydikpKV0NCg0KICAjIHBlcmZvcm0gYSBQQ0Egb24gdGhlIGRhdGEgaW4gYXNzYXkoeCkgZm9yIHRoZSBzZWxlY3RlZCBnZW5lcw0KICBwY2EgPC0gcHJjb21wKHQoYXNzYXkob2JqZWN0KVtzZWxlY3QsXSkpDQoNCiAgIyB0aGUgY29udHJpYnV0aW9uIHRvIHRoZSB0b3RhbCB2YXJpYW5jZSBmb3IgZWFjaCBjb21wb25lbnQNCiAgcGVyY2VudFZhciA8LSBwY2Ekc2Rldl4yIC8gc3VtKCBwY2Ekc2Rldl4yICkNCg0KICAjIFRvcCAyMCBjb250cmlidXRlcnMgdG8gUEMxIFBDMg0KICBQQ0ExX2NvbnRyaWIgPC0gc29ydChhYnMocGNhJHJvdGF0aW9uWywxXSksIGRlY3JlYXNpbmcgPSBUUlVFIClbMToyMF0NCiAgUENBMl9jb250cmliIDwtIHNvcnQoYWJzKHBjYSRyb3RhdGlvblssMl0pLCBkZWNyZWFzaW5nID0gVFJVRSApWzE6MjBdDQogIFBDQV9jb250cmliIDwtIGMoUENBMV9jb250cmliLCBQQ0EyX2NvbnRyaWIpDQogIFBDQV9jb250cmliIDwtIGRhdGEuZnJhbWUoImVuc2dlbmUiID0gbmFtZXMoUENBX2NvbnRyaWIpLCBQQ0FfY29udHJpYikNCiAgUENBX2NvbnRyaWIgPC0gY2JpbmQoUENBX2NvbnRyaWIsIGRhdGEuZnJhbWUoIlBDIiA9IGMocmVwKCJQQzEiLDIwKSxyZXAoIlBDMiIsIjIwIikpKSkNCiAgUENBX2NvbnRyaWIgPC0gbGVmdF9qb2luKFBDQV9jb250cmliLCBteV9nZW5vbWUsIGJ5ID0gImVuc2dlbmUiKSAlPiUgZHBseXI6OnNlbGVjdCgiUEMiLCJzeW1ib2wiLCAiYmlvdHlwZSIpDQogIHJldHVybihQQ0FfY29udHJpYikNCn0NCg0KZGF0YXRhYmxlKHRvcF9jb250cmlicyhteV92c3QsZ3JjaDM4KSkNCg0KYGBgDQoNCiMjIyBEaWZmZXJlbnRpYWwgR2VuZSBFeHByZXNzaW9uDQoNCmBgYHtyfQ0KZGVzZXEgPC0gREVTZXEoZGRzLCBwYXJhbGxlbCA9IFRSVUUpDQoNCnJlcyA8LSByZXN1bHRzKGRlc2VxLCBjb250cmFzdCA9IGMoImNvbmRpdGlvbiIsICJMSVZFUiBGQUlMVVJFIiwgIkNPTlRST0wiKSkNCnJlc09yZGVyZWQgPC0gcmVzW29yZGVyKHJlcyRwYWRqKSxdDQpyZXNPcmRlcmVkIDwtIGRhdGEuZnJhbWUocmVzT3JkZXJlZCkNCnJlc09yZGVyZWQgPC0gbGVmdF9qb2luKHJvd25hbWVzX3RvX2NvbHVtbihyZXNPcmRlcmVkLCB2YXIgPSAiZW5zZ2VuZSIpLCBncmNoMzgsIGJ5ID0gImVuc2dlbmUiKSAlPiUgDQogIGRwbHlyOjpzZWxlY3Qoc3ltYm9sLGxvZzJGb2xkQ2hhbmdlLHBhZGosYmlvdHlwZSkNCiAjIGZvcl9EVCA8LSByZXNPcmRlcmVkWzE6MjAwMCxdDQoNCmRhdGF0YWJsZShyZXNPcmRlcmVkLCANCiAgICAgICAgICBleHRlbnNpb25zID0gJ0J1dHRvbnMnLCBvcHRpb25zID0gbGlzdCgNCiAgICBkb20gPSAnQmZydGlwJywNCiAgICBidXR0b25zID0gYygnZXhjZWwnLCAncGRmJywgJ3ByaW50JykNCiAgKSkNCmBgYA0KDQojIyMgR1NFQQ0KDQpgYGB7cn0NCg0KaGFsbG1hcmsgPC0gZ210UGF0aHdheXMoImguYWxsLnY2LjIuc3ltYm9scy5nbXQiKQ0KDQpwbG90X2dzZWEgPC0gZnVuY3Rpb24ocmVzKXsNCnJlcyA8LSByZXNbY29tcGxldGUuY2FzZXMocmVzKSxdDQpyZXMkZmNTaWduIDwtIHNpZ24ocmVzJGxvZzJGb2xkQ2hhbmdlKQ0KcmVzJGxvZ1AgPC0gLWxvZzEwKHJlcyRwYWRqKQ0KcmVzJG1ldHJpYyA8LSByZXMkbG9nUC9yZXMkZmNTaWduDQp5PC1yZXNbLGMoInN5bWJvbCIsICJtZXRyaWMiKV0NCmdlbmVMaXN0IDwtIHkkbWV0cmljDQpuYW1lcyhnZW5lTGlzdCkgPC0geSRzeW1ib2wNCmdlbmVMaXN0IDwtIGdlbmVMaXN0W29yZGVyKGdlbmVMaXN0LCBkZWNyZWFzaW5nID0gVCldDQpmZ3NlYVJlcyA8LSBmZ3NlYShoYWxsbWFyaywgZ2VuZUxpc3QsIG5wZXJtPTEwMDAwLCBtYXhTaXplID0gNTAwKQ0KZmdzZWFSZXMgPC0gZmdzZWFSZXNbZmdzZWFSZXMkcGFkaiA8IDAuMDAwNSxdDQpwbG90R3NlYVRhYmxlKGhhbGxtYXJrW2Znc2VhUmVzJHBhdGh3YXldLCBnZW5lTGlzdCwgZmdzZWFSZXMsIGdzZWFQYXJhbSA9IDAuNCkNCn0NCg0KcGxvdF9nc2VhKHJlc09yZGVyZWQpDQpgYGANCg0KIyMgTW91c2UNCiMjIyBQQ0ENCg0KYGBge3J9DQpjb3VudHMgPC0gcmVhZC5kZWxpbSgnbW91c2VfY291bnRzJywgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQ0KDQptb3VzZV9jb2xkYXRhIDwtIHNhbXBsZV90YWJsZSAlPiUgZHBseXI6OmZpbHRlcihzcGVjaWVzID09ICJNb3VzZSIpDQpyb3duYW1lcyhtb3VzZV9jb2xkYXRhKSA8LSAgbW91c2VfY29sZGF0YSRzYW1wbGVfaWQNCm1vdXNlX2NvbGRhdGEkc2FtcGxlX2lkIDwtIE5VTEwNCmRkcyA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50cywgY29sRGF0YSA9IG1vdXNlX2NvbGRhdGEsIGRlc2lnbiA9IH5jb25kaXRpb24pDQoNCm15X3ZzdCA8LSB2c3QoZGRzKQ0KDQpwY2FEYXRhIDwtIERFU2VxMjo6cGxvdFBDQShteV92c3QsIGludGdyb3VwID0iY29uZGl0aW9uIixyZXR1cm5EYXRhID0gVFJVRSkNCnBlcmNlbnRWYXIgPC0gcm91bmQoMTAwICogYXR0cihwY2FEYXRhLCAicGVyY2VudFZhciIpKQ0KDQpnZ3Bsb3QocGNhRGF0YSxhZXMoUEMxLFBDMixjb2xvdXIgPSBjb25kaXRpb24pKSArIGdlb21fcG9pbnQoc2l6ZSA9IDMgKSArDQogICAgICAgIHhsYWIocGFzdGUwKCJQQzE6ICIscGVyY2VudFZhclsxXSwiJSB2YXJpYW5jZSIpKSArDQogICAgICAgIHlsYWIocGFzdGUwKCJQQzI6ICIscGVyY2VudFZhclsyXSwiJSB2YXJpYW5jZSIpKSArIA0KICAgICAgICAgIGdlb21fbGFiZWxfcmVwZWwoYWVzKGxhYmVsID0gbmFtZSksIHNpemUgPSAyLGJveC5wYWRkaW5nICAgPSAwLjEscG9pbnQucGFkZGluZyA9IDAuMSwNCiAgICAgICAgICAgICAgICAgICBzZWdtZW50LmNvbG9yID0gJ2dyZXk1MCcpICsNCiAgICAgICAgdGhlbWVfbWluaW1hbCgpDQoNCmBgYA0KDQojIyMgR2VuZXMgZHJpdmluZyBQQzEgYW5kIFBDMg0KDQpgYGB7cn0NCmRhdGF0YWJsZSh0b3BfY29udHJpYnMobXlfdnN0LGdyY20zOCkpDQpgYGANCg0KIyMjIERpZmZlcmVudGlhbCBHZW5lIEV4cHJlc3Npb24NCg0KDQpgYGB7cn0NCmRlc2VxIDwtIERFU2VxKGRkcywgcGFyYWxsZWwgPSBUUlVFKQ0KcmVzIDwtIHJlc3VsdHMoZGVzZXEsIGNvbnRyYXN0ID0gYygiY29uZGl0aW9uIiwgIkxJVkVSIEZBSUxVUkUiLCAiQ09OVFJPTCIpKQ0KcmVzT3JkZXJlZCA8LSByZXNbb3JkZXIocmVzJHBhZGopLF0NCnJlc09yZGVyZWQgPC0gZGF0YS5mcmFtZShyZXNPcmRlcmVkKQ0KcmVzT3JkZXJlZCA8LSBsZWZ0X2pvaW4ocm93bmFtZXNfdG9fY29sdW1uKHJlc09yZGVyZWQsIHZhciA9ICJlbnNnZW5lIiksIGdyY20zOCwgYnkgPSAiZW5zZ2VuZSIpICU+JSANCiAgZHBseXI6OnNlbGVjdChzeW1ib2wsbG9nMkZvbGRDaGFuZ2UscGFkaixiaW90eXBlKQ0KI2Zvcl9EVCA8LSByZXNPcmRlcmVkWzE6MjAwMCxdDQoNCmRhdGF0YWJsZShyZXNPcmRlcmVkLCANCiAgICAgICAgICBleHRlbnNpb25zID0gJ0J1dHRvbnMnLCBvcHRpb25zID0gbGlzdCgNCiAgICBkb20gPSAnQmZydGlwJywNCiAgICBidXR0b25zID0gYygnZXhjZWwnLCAncGRmJywgJ3ByaW50JykNCiAgKSkNCmBgYA0KDQoNCg==